Algo Trading Project - Hugo Roccaro

1. Pre-requisities¶

1.1 Librairies¶

In [157]:
import os
import warnings
import numpy as np
import pandas as pd

# For PCA
import statsmodels.api as sm
from statsmodels.multivariate import pca
from ppca import PPCA

#For portfolio optimization
from scipy.optimize import minimize
import cvxpy as cp

# For plotting and formatting the plots
import matplotlib.pyplot as plt
import matplotlib.ticker as mtkr
import matplotlib.dates as mdts
import seaborn as sns
import plotly
import plotly.graph_objects as go
import plotly.express as px

warnings.filterwarnings("ignore")
plotly.offline.init_notebook_mode()

1.2 Path¶

In [2]:
os.chdir("/Users/hroccaro/Desktop/Algo trading")

1.3 Useful Functions¶

In [3]:
# Funtion to make a time series plot on one axis

def plot_timeseries_one_axis(data, y_label=None):
    """
    Plot a time series graph using a single axis.

    Args
    ----
    data : pandas DataFrame
        A DataFrame with the time series data to be plotted. Dates should be set as the index.
    y_label : str, optional
        The label for the y-axis. If None, no label is set. Default is None.

    Returns
    -------
    fig : matplotlib.figure.Figure
        The `Figure` object which acts as the container for the plot. It can be used for further customizations or saving the plot.    
    ax : matplotlib.axes.Axes
        The `Axes` object representing the plot of the time series. It provides access to axes-level functions for further customization.
    """

    fig, ax = plt.subplots(nrows=1, ncols=1)
    ax.grid(visible=True, linestyle='dashed', lw=0.35, color='lightgray')
    data.plot(ax=ax, lw=1.)
    if y_label is not None:
        ax.set_ylabel(ylabel=y_label)
    ax.xaxis.set_major_formatter(mdts.DateFormatter('%b-%y'))
    ax.legend(loc='best', framealpha=0)

    return fig, ax
In [4]:
# Function to make a correlation heatmap

def plot_heatmap(data_df, ax=None, cbar_kw=None, cbarlabel="Correlation", annotate=True, **kwargs):
    """
    Create and annotate a heatmap from a pandas DataFrame.

    Args
    ----
    data_df : pandas DataFrame
        A DataFrame with the data to be plotted. Row and column labels are taken from the DataFrame.
    ax : matplotlib.axes.Axes, optional
        A `matplotlib.axes.Axes` instance to which the heatmap is plotted.
    cbar_kw : dict, optional
        A dictionary with arguments to `matplotlib.Figure.colorbar`.
    cbarlabel : str, optional
        The label for the colorbar.
    annotate : bool, optional
        Flag to annotate the heatmap. Default is True.
    **kwargs
        All other arguments are forwarded to `imshow`.
            
    Returns
    -------
    im : matplotlib.image.AxesImage
        The `AxesImage` object created by `imshow` representing the heatmap. It can be used for further customizations if needed.
    cbar : matplotlib.colorbar.Colorbar
        The `Colorbar` object for the heatmap. It represents the color scale of the heatmap and is attached to the provided axes. This can also be used for further customizations, like changing the colorbar properties.
    """

    if ax is None:
        ax = plt.gca()

    if cbar_kw is None:
        cbar_kw = {}

    # Extract data and labels from the DataFrame
    data = data_df.values
    row_labels = data_df.index
    col_labels = data_df.columns

    # Plot the heatmap
    im = ax.imshow(data, **kwargs)

    # Create colorbar
    cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw, shrink=0.8, format=mtkr.FuncFormatter(lambda x, _: '{:.0%}'.format(x)))
    cbar.ax.set_ylabel(cbarlabel, rotation=-90, va="bottom")

    # Show all ticks and label them with the respective list entries.
    ax.set_xticks(np.arange(data.shape[1]), labels=col_labels)
    ax.set_yticks(np.arange(data.shape[0]), labels=row_labels)

    # Let the horizontal axes labeling appear on top.
    ax.tick_params(top=True, bottom=False, labeltop=True, labelbottom=False)

    # Rotate the tick labels and set their alignment.
    plt.setp(ax.get_xticklabels(), rotation=-30, ha="right", rotation_mode="anchor")

    # Turn spines off and create white grid.
    ax.spines[:].set_visible(False)

    ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)
    ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)
    ax.grid(which="minor", color="w", linestyle='-', linewidth=3)
    ax.tick_params(which="minor", bottom=False, left=False)

    # Annotate the heatmap
    if annotate:
        # Normalizing the threshold
        threshold = im.norm(data.max())/2.

        # Setting up the format for annotations
        valfmt = mtkr.FuncFormatter(lambda x, _: '{:.0%}'.format(x))

        # Loop over the data and create a `Text` for each "pixel".
        textcolors = ("black", "white")
        for i in range(data.shape[0]):
            for j in range(data.shape[1]):
                color = textcolors[int(im.norm(data[i, j]) > threshold)]
                im.axes.text(j, i, valfmt(data[i, j], None),
                             ha="center", va="center", color=color)

    return im, cbar
In [5]:
# Function for computing portfolio return

def portfolio_returns(daily_returns, weights):
    """
    Calculate the annualized return of a portfolio based on daily returns and portfolio weights.

    Args
    ----
    daily_returns : pandas DataFrame or array_like
        The daily returns of the assets in the portfolio. Each column should represent an asset.
    weights : array_like
        Weights of the assets in the portfolio. The sum of all weights should be 1.

    Returns
    -------
    returns : float
        The annualized return of the portfolio. This is computed as the sum of the mean of daily returns 
        of each asset, weighted by the portfolio weights, and then annualized (typically using 253 trading days).
    """

    returns = (np.sum(daily_returns.mean() * weights)) * 253
    
    return returns
In [6]:
# Function for computing standard deviation of portfolio returns

def portfolio_sd(daily_returns, weights):
    """
    Calculate the standard deviation of annualized returns of a portfolio, representing the portfolio risk.

    Args
    ----
    daily_returns : pandas DataFrame or array_like
        The daily returns of the assets in the portfolio. Each column should represent an asset.
    weights : array_like
        Weights of the assets in the portfolio. The sum of all weights should be 1.

    Returns
    -------
    sd : float
        The standard deviation of the portfolio's annualized returns, which represents the portfolio risk.
        Calculated using the covariance matrix of the daily returns, weighted by the portfolio weights,
        and annualized (typically using 253 trading days).
    """

    sd = np.sqrt(np.dot(np.transpose(weights), np.dot(daily_returns.cov() * 253, weights)))
    
    return sd
In [104]:
def optimize_long_only_min_variance_portfolio(returns):
    """
    Optimize a long-only minimum variance portfolio over time, with constraints on allocation per factor and trade limits.

    Args
    ----
    returns : pandas DataFrame
        A DataFrame containing the returns of different factors. Each column represents a factor, and each row represents a time period (e.g., a month).

    Returns
    -------
    optimized_weights : pandas DataFrame
        A DataFrame containing the optimized weights for each factor in each time period. Each column represents a factor, and each row represents the optimized weights for that factor at a specific time period.

    Notes
    -----
    The optimization is conducted sequentially for each time period, starting with an equal distribution among factors.
    Constraints are applied to ensure the total allocation sums to 1, no factor exceeds a 0.25 allocation, and the portfolio remains long-only.
    Additional constraints limit the total trading movement and the movement per factor between consecutive time periods.
    """
    
    nb_factors = returns.shape[1]
    nb_months = returns.shape[0]

    # Initialize a DataFrame to store the optimized portfolio weights
    optimized_weights = pd.DataFrame(0.0, index=returns.index, columns=returns.columns, dtype=float)

    # Starting with an equal distribution among factors
    initial_weights = np.array([1.0 / nb_factors] * nb_factors)
    optimized_weights.iloc[0] = initial_weights

    for i in range(1, nb_months):
        # Calculate the covariance matrix for the current month
        covariance_matrix = np.cov(returns.iloc[:i].values, rowvar=False)
        # Symmetrize and regularize the covariance matrix
        covariance_matrix = (covariance_matrix + covariance_matrix.T) / 2 + 1e-8 * np.eye(nb_factors)

        # Define the optimization problem
        weights = cp.Variable(nb_factors)
        portfolio_variance = cp.quad_form(weights, covariance_matrix)
        objective = cp.Minimize(portfolio_variance)
        constraints = [
            cp.sum(weights) == 1,  # Total allocation must be 1
            cp.max(weights) <= 0.25,  # No factor can have more than 0.25 allocation
            cp.min(weights) >= 0,  # Long only portfolio
            cp.norm(weights - optimized_weights.iloc[i-1].values, 1) <= 0.15,  # Total trading not more than 0.15
            cp.abs(weights - optimized_weights.iloc[i-1].values) <= 0.05  # Cannot trade more than 0.05 per factor
        ]

        # Solve the problem
        prob = cp.Problem(objective, constraints)
        prob.solve()

        # Store the optimized weights
        optimized_weights.iloc[i] = weights.value

    return optimized_weights
In [149]:
def optimize_long_short_min_variance_portfolio(returns):
    """
    Optimize a long-short minimum variance portfolio over time, with constraints on allocation per factor and trade limits.

    Args
    ----
    returns : pandas DataFrame
        A DataFrame containing the returns of different factors. Each column represents a factor, and each row represents a time period (e.g., a month).

    Returns
    -------
    optimized_weights : pandas DataFrame
        A DataFrame containing the optimized weights for each factor in each time period. Each column represents a factor, and each row represents the optimized weights for that factor at a specific time period.
    """
    
    nb_factors = returns.shape[1]
    nb_months = returns.shape[0]

    # Initialize a DataFrame to store the optimized portfolio weights
    optimized_weights = pd.DataFrame(0.0, index=returns.index, columns=returns.columns, dtype=float)

    # Starting with an equal distribution among factors
    initial_weights = np.array([1.0 / nb_factors] * nb_factors)
    optimized_weights.iloc[0] = initial_weights

    for i in range(1, nb_months):
        # Calculate the covariance matrix for the current month
        covariance_matrix = np.cov(returns.iloc[:i].values, rowvar=False)
        # Symmetrize and regularize the covariance matrix
        covariance_matrix = (covariance_matrix + covariance_matrix.T) / 2 + 1e-8 * np.eye(nb_factors)

        # Define the optimization problem
        weights = cp.Variable(nb_factors)
        abs_weights = cp.Variable(nb_factors, nonneg=True)
        portfolio_variance = cp.quad_form(weights, covariance_matrix)
        objective = cp.Minimize(portfolio_variance)
        constraints = [
            cp.sum(abs_weights) == 1,
            cp.max(weights) <= 0.25,  # No factor can have more than 0.25 allocation
            cp.norm(weights - optimized_weights.iloc[i-1].values, 1) <= 0.15,  # Total trading not more than 0.15
            cp.abs(weights - optimized_weights.iloc[i-1].values) <= 0.05  # Cannot trade more than 0.05 per factor
        ]

        # Solve the problem
        prob = cp.Problem(objective, constraints)
        prob.solve(solver=cp.SCS)

        # Store the optimized weights
        optimized_weights.iloc[i] = weights.value

    return optimized_weights

2. PCA¶

2.1 Data Preproccesing¶

In [8]:
# Read Data

change_freq = '4w'
data_pca = pd.read_csv(filepath_or_buffer='_'.join(['Multi-asset PCA', 'CHANGES=' + change_freq]) + '.csv',
                      index_col=0, header=0,
                      parse_dates=True).astype(np.float64)

n_assets = data_pca.shape[1]
In [9]:
# Show the dateframe
data_pca
Out[9]:
MSCI World S&P 500 Euro Stoxx 50 Euro Stoxx Banks CDX.IG 5Y CDX.HY 5Y iTraxx Europe 5Y iTraxx Crossover 5Y Italy - Germany 10Y US 2Y ... USD 3M x 10Y ATM vol. USD 1Y x 10Y ATM vol. USD 5Y x 5Y ATM vol. USD 10Y x 20Y ATM vol. US Govt. European Govt. US Corp. Bonds European Corp. Bonds US High Yield European High Yield
2005-06-10 0.012532 0.022793 0.056718 0.047424 0.006602 0.026766 0.006048 0.035003 4.9 1.65 ... 0.00 -0.400 2.65 -0.15 0.010386 0.017674 0.013421 0.019308 0.029171 0.029893
2005-07-06 0.002255 -0.002649 0.025209 0.017424 -0.000800 0.006752 0.001608 0.012723 0.9 7.43 ... 0.95 0.950 -2.10 -1.80 0.001192 -0.004078 0.002829 -0.002100 0.009255 0.009683
2005-08-03 0.046502 0.041080 0.036400 0.041141 0.003598 0.018337 0.001442 0.003727 -2.9 24.17 ... 0.15 -1.150 -2.55 -2.20 -0.008140 -0.005336 -0.004783 -0.005385 0.015161 0.014480
2005-08-31 -0.008268 -0.020054 -0.024171 -0.019341 0.001110 -0.003190 0.000003 0.002291 1.6 -19.54 ... -2.95 0.600 1.95 2.75 0.016193 0.015940 0.016234 0.016116 0.003449 0.007675
2005-09-27 0.010540 -0.003834 0.036243 0.042163 0.000609 -0.001547 0.001839 0.008034 -0.1 25.54 ... -0.05 -0.750 -0.70 0.35 -0.011720 0.000517 -0.014359 -0.000039 -0.009020 0.001002
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-11-30 0.084752 0.081787 0.090406 0.073881 0.007581 0.030603 0.009734 0.036006 -20.9 -30.95 ... -14.10 -4.654 -2.90 -2.30 0.026953 0.022098 0.047535 0.025662 0.021258 0.032360
2022-12-28 -0.055047 -0.075548 -0.040116 0.012875 -0.003171 -0.014656 0.000119 -0.004665 17.9 4.10 ... 4.60 3.006 2.50 2.00 -0.006943 -0.045961 -0.006454 -0.019763 -0.006197 -0.008446
2023-01-25 0.068134 0.059766 0.085333 0.101296 0.006157 0.028027 0.006219 0.024940 -33.4 -22.62 ... -17.90 -21.406 -14.50 -5.50 0.028097 0.030792 0.041032 0.027126 0.035909 0.029603
2023-02-22 -0.009421 -0.006287 0.022589 0.063275 -0.001573 -0.007013 0.000186 0.003366 15.3 56.83 ... 2.40 3.907 4.30 3.40 -0.025541 -0.026568 -0.031878 -0.012932 -0.019851 0.002439
2023-03-22 -0.016522 -0.013643 -0.011182 -0.096528 -0.000676 -0.013304 -0.000156 -0.002206 -9.9 -75.66 ... 9.20 1.447 2.70 -1.10 0.029796 0.014531 0.023493 -0.000127 0.005266 -0.009522

232 rows × 40 columns

In [10]:
# Normalise the data to have zero mean and standard deviation 1; show the normalised data

data_pca_normalised = (data_pca - data_pca.mean()) / data_pca.std()
data_pca_normalised
Out[10]:
MSCI World S&P 500 Euro Stoxx 50 Euro Stoxx Banks CDX.IG 5Y CDX.HY 5Y iTraxx Europe 5Y iTraxx Crossover 5Y Italy - Germany 10Y US 2Y ... USD 3M x 10Y ATM vol. USD 1Y x 10Y ATM vol. USD 5Y x 5Y ATM vol. USD 10Y x 20Y ATM vol. US Govt. European Govt. US Corp. Bonds European Corp. Bonds US High Yield European High Yield
2005-06-10 0.167450 0.343277 1.016358 0.552594 1.021668 0.940069 0.834451 1.310608 0.148132 0.072246 ... -0.012515 -0.059194 0.499113 -0.047339 0.637123 1.088591 0.489450 1.186558 0.785765 0.764510
2005-07-06 -0.027851 -0.153893 0.436512 0.233379 -0.263237 0.130086 0.121136 0.364581 0.006282 0.343949 ... 0.066179 0.112389 -0.404934 -0.546551 -0.067827 -0.391676 -0.009991 -0.261775 0.146020 0.152702
2005-08-03 0.812959 0.700608 0.642461 0.485735 0.500203 0.598946 0.094529 -0.017407 -0.128475 1.130855 ... -0.000089 -0.154518 -0.490581 -0.667572 -0.783406 -0.477266 -0.368965 -0.483981 0.335740 0.297913
2005-08-31 -0.227815 -0.494001 -0.472199 -0.157813 0.068151 -0.272288 -0.136733 -0.078345 0.031106 -0.923843 ... -0.256881 0.067905 0.365885 0.830064 1.082408 0.970536 0.622093 0.970584 -0.040484 0.091916
2005-09-27 0.129590 -0.177045 0.639576 0.496616 -0.018805 -0.205800 0.158257 0.165498 -0.029180 1.195255 ... -0.016656 -0.103678 -0.138478 0.103937 -1.057891 -0.078988 -0.820468 -0.122343 -0.441008 -0.110103
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-11-30 1.539804 1.496070 1.636306 0.834108 1.191457 1.095391 1.426497 1.353180 -0.766798 -1.460199 ... -1.180501 -0.599871 -0.557195 -0.697827 1.907484 1.389646 2.098054 1.616380 0.531607 0.839217
2022-12-28 -1.116731 -1.578400 -0.765623 0.184979 -0.674976 -0.736341 -0.118090 -0.373734 0.609143 0.187414 ... 0.368531 0.373703 0.470564 0.603149 -0.691614 -3.241830 -0.447752 -1.456655 -0.350336 -0.396110
2023-01-25 1.224006 1.065746 1.542955 1.125815 0.944331 0.991104 0.861899 0.883335 -1.210078 -1.068626 ... -1.495278 -2.729024 -2.764973 -1.665996 1.995159 1.981227 1.791424 1.715428 1.002223 0.755746
2023-02-22 -0.249724 -0.224972 0.388311 0.721253 -0.397561 -0.426986 -0.107307 -0.032696 0.516941 2.666119 ... 0.186292 0.488219 0.813150 1.026723 -2.117699 -1.922162 -1.646589 -0.994542 -0.788922 -0.066579
2023-03-22 -0.384663 -0.368717 -0.233170 -0.979117 -0.241827 -0.681624 -0.162214 -0.269298 -0.376711 -3.561904 ... 0.749576 0.175557 0.508629 -0.334764 2.125446 0.874675 0.964376 -0.128285 0.017894 -0.428692

232 rows × 40 columns

In [11]:
# Show correlation data

data_pca_corr = data_pca_normalised.corr()
data_pca_corr.style.format(formatter='{:.0%}')
Out[11]:
  MSCI World S&P 500 Euro Stoxx 50 Euro Stoxx Banks CDX.IG 5Y CDX.HY 5Y iTraxx Europe 5Y iTraxx Crossover 5Y Italy - Germany 10Y US 2Y US 5Y US 10Y Germany 2Y Germany 5Y Germany 10Y US B/E Inflation 10Y Dollar Index EUR/USD USD/JPY Precious Metals Industrial Metals Crude Oil Agriculture S&P 500 1M ATM imp. vol. S&P 500 1M 80% imp. vol. EUR/USD 3M ATM vol. USD/JPY 3M ATM vol. AUD/USD 3M ATM vol. USD/CAD 3M ATM vol. USD 2Y x 2Y ATM vol. USD 3M x 10Y ATM vol. USD 1Y x 10Y ATM vol. USD 5Y x 5Y ATM vol. USD 10Y x 20Y ATM vol. US Govt. European Govt. US Corp. Bonds European Corp. Bonds US High Yield European High Yield
MSCI World 100% 98% 89% 71% 79% 85% 71% 75% -25% 22% 22% 17% 25% 20% 15% 60% -53% 48% 4% 33% 54% 41% 27% -81% -65% -46% -46% -69% -53% -20% -44% -41% -33% -39% -16% 7% 53% 49% 77% 74%
S&P 500 98% 100% 85% 64% 79% 84% 69% 72% -21% 23% 23% 17% 20% 16% 11% 57% -42% 38% 9% 26% 49% 35% 23% -82% -66% -44% -47% -68% -53% -19% -46% -42% -33% -37% -16% 9% 52% 51% 76% 71%
Euro Stoxx 50 89% 85% 100% 86% 76% 79% 77% 79% -35% 28% 30% 27% 28% 24% 22% 52% -33% 30% 12% 17% 44% 36% 18% -71% -59% -43% -43% -55% -46% -15% -39% -35% -29% -35% -26% 6% 38% 46% 66% 69%
Euro Stoxx Banks 71% 64% 86% 100% 63% 65% 68% 66% -44% 33% 36% 35% 38% 39% 36% 49% -35% 34% 15% 7% 45% 41% 20% -51% -42% -38% -35% -39% -41% -12% -29% -30% -27% -31% -36% -9% 19% 28% 55% 60%
CDX.IG 5Y 79% 79% 76% 63% 100% 83% 90% 78% -30% 33% 31% 26% 24% 24% 22% 51% -30% 28% 12% 17% 36% 26% 13% -71% -61% -43% -43% -53% -49% -25% -41% -41% -34% -28% -27% 2% 40% 42% 72% 70%
CDX.HY 5Y 85% 84% 79% 65% 83% 100% 79% 89% -24% 28% 27% 23% 21% 19% 16% 51% -34% 29% 5% 19% 46% 37% 18% -70% -56% -45% -43% -63% -52% -19% -46% -41% -31% -35% -22% 7% 51% 52% 81% 76%
iTraxx Europe 5Y 71% 69% 77% 68% 90% 79% 100% 87% -46% 32% 34% 34% 27% 28% 28% 47% -25% 23% 14% 9% 35% 24% 8% -60% -51% -51% -41% -48% -51% -12% -38% -32% -22% -21% -33% 4% 33% 44% 65% 69%
iTraxx Crossover 5Y 75% 72% 79% 66% 78% 89% 87% 100% -34% 31% 34% 33% 27% 23% 21% 49% -26% 21% 13% 15% 45% 36% 13% -63% -52% -54% -45% -59% -53% -6% -41% -29% -16% -24% -32% 6% 41% 51% 75% 76%
Italy - Germany 10Y -25% -21% -35% -44% -30% -24% -46% -34% 100% -5% -12% -18% -14% -21% -22% -21% 25% -29% -13% -2% -16% -10% -2% 17% 15% 25% 8% 10% 19% -1% 5% 7% 5% 2% 16% -15% -2% -12% -14% -23%
US 2Y 22% 23% 28% 33% 33% 28% 32% 31% -5% 100% 89% 76% 59% 62% 60% 34% 19% -15% 53% -16% 18% 25% 13% -30% -25% -24% -26% -22% -30% 15% -10% -3% 1% -5% -81% -51% -36% -18% 14% 18%
US 5Y 22% 23% 30% 36% 31% 27% 34% 34% -12% 89% 100% 94% 59% 70% 76% 43% 16% -13% 56% -19% 20% 28% 15% -28% -22% -25% -18% -19% -29% 36% 6% 17% 18% -2% -95% -63% -45% -26% 14% 19%
US 10Y 17% 17% 27% 35% 26% 23% 34% 33% -18% 76% 94% 100% 54% 69% 81% 46% 14% -11% 52% -20% 22% 30% 17% -20% -16% -27% -13% -12% -27% 46% 13% 29% 31% 6% -98% -66% -52% -30% 11% 17%
Germany 2Y 25% 20% 28% 38% 24% 21% 27% 27% -14% 59% 59% 54% 100% 89% 74% 33% -23% 30% 32% 2% 29% 29% 32% -21% -20% -27% -14% -21% -25% 16% -1% 2% 2% -12% -54% -66% -23% -34% 14% 18%
Germany 5Y 20% 16% 24% 39% 24% 19% 28% 23% -21% 62% 70% 69% 89% 100% 93% 37% -18% 25% 37% -4% 25% 25% 29% -17% -15% -21% -7% -12% -21% 27% 8% 13% 12% -7% -68% -80% -38% -48% 7% 11%
Germany 10Y 15% 11% 22% 36% 22% 16% 28% 21% -22% 60% 76% 81% 74% 93% 100% 39% -8% 14% 42% -10% 21% 26% 27% -13% -11% -20% -5% -6% -18% 37% 13% 23% 22% 3% -79% -85% -46% -50% 5% 10%
US B/E Inflation 10Y 60% 57% 52% 49% 51% 51% 47% 49% -21% 34% 43% 46% 33% 37% 39% 100% -32% 30% 15% 29% 53% 49% 40% -50% -40% -36% -29% -42% -44% 4% -21% -12% -1% -17% -46% -21% 14% 20% 57% 55%
Dollar Index -53% -42% -33% -35% -30% -34% -25% -26% 25% 19% 16% 14% -23% -18% -8% -32% 100% -97% 46% -55% -40% -31% -33% 31% 26% 23% 5% 36% 14% 21% 11% 22% 24% 26% -17% -1% -37% -12% -32% -30%
EUR/USD 48% 38% 30% 34% 28% 29% 23% 21% -29% -15% -13% -11% 30% 25% 14% 30% -97% 100% -34% 49% 37% 28% 32% -31% -27% -20% -1% -32% -11% -19% -8% -21% -24% -28% 14% -8% 29% -1% 23% 22%
USD/JPY 4% 9% 12% 15% 12% 5% 14% 13% -13% 53% 56% 52% 32% 37% 42% 15% 46% -34% 100% -36% 1% 10% 0% -17% -20% -16% -14% -9% -24% 25% 2% 11% 16% 3% -53% -35% -35% -20% 3% 6%
Precious Metals 33% 26% 17% 7% 17% 19% 9% 15% -2% -16% -19% -20% 2% -4% -10% 29% -55% 49% -36% 100% 45% 15% 28% -23% -19% -6% 3% -27% -7% -16% 1% -9% -12% -9% 22% 9% 35% 13% 27% 20%
Industrial Metals 54% 49% 44% 45% 36% 46% 35% 45% -16% 18% 20% 22% 29% 25% 21% 53% -40% 37% 1% 45% 100% 48% 43% -40% -35% -41% -27% -43% -45% 1% -21% -15% -6% -14% -22% -11% 21% 19% 53% 49%
Crude Oil 41% 35% 36% 41% 26% 37% 24% 36% -10% 25% 28% 30% 29% 25% 26% 49% -31% 28% 10% 15% 48% 100% 35% -31% -30% -23% -22% -29% -32% 8% -11% -5% 3% -10% -32% -18% 8% 13% 40% 38%
Agriculture 27% 23% 18% 20% 13% 18% 8% 13% -2% 13% 15% 17% 32% 29% 27% 40% -33% 32% 0% 28% 43% 35% 100% -19% -21% -21% -11% -22% -19% 8% 3% 3% 6% 0% -18% -23% 5% -3% 27% 26%
S&P 500 1M ATM imp. vol. -81% -82% -71% -51% -71% -70% -60% -63% 17% -30% -28% -20% -21% -17% -13% -50% 31% -31% -17% -23% -40% -31% -19% 100% 84% 47% 52% 68% 54% 24% 48% 45% 35% 39% 22% -0% -38% -34% -61% -53%
S&P 500 1M 80% imp. vol. -65% -66% -59% -42% -61% -56% -51% -52% 15% -25% -22% -16% -20% -15% -11% -40% 26% -27% -20% -19% -35% -30% -21% 84% 100% 46% 47% 63% 49% 23% 44% 42% 32% 36% 17% 0% -33% -27% -52% -45%
EUR/USD 3M ATM vol. -46% -44% -43% -38% -43% -45% -51% -54% 25% -24% -25% -27% -27% -21% -20% -36% 23% -20% -16% -6% -41% -23% -21% 47% 46% 100% 59% 72% 79% 3% 42% 23% 6% 11% 25% -2% -30% -33% -57% -55%
USD/JPY 3M ATM vol. -46% -47% -43% -35% -43% -43% -41% -45% 8% -26% -18% -13% -14% -7% -5% -29% 5% -1% -14% 3% -27% -22% -11% 52% 47% 59% 100% 61% 62% 25% 49% 41% 26% 20% 15% -9% -30% -35% -49% -46%
AUD/USD 3M ATM vol. -69% -68% -55% -39% -53% -63% -48% -59% 10% -22% -19% -12% -21% -12% -6% -42% 36% -32% -9% -27% -43% -29% -22% 68% 63% 72% 61% 100% 77% 17% 43% 33% 21% 24% 10% -8% -55% -42% -65% -59%
USD/CAD 3M ATM vol. -53% -53% -46% -41% -49% -52% -51% -53% 19% -30% -29% -27% -25% -21% -18% -44% 14% -11% -24% -7% -45% -32% -19% 54% 49% 79% 62% 77% 100% 14% 45% 33% 16% 18% 26% -3% -30% -32% -59% -53%
USD 2Y x 2Y ATM vol. -20% -19% -15% -12% -25% -19% -12% -6% -1% 15% 36% 46% 16% 27% 37% 4% 21% -19% 25% -16% 1% 8% 8% 24% 23% 3% 25% 17% 14% 100% 55% 80% 84% 44% -41% -36% -35% -26% -18% -10%
USD 3M x 10Y ATM vol. -44% -46% -39% -29% -41% -46% -38% -41% 5% -10% 6% 13% -1% 8% 13% -21% 11% -8% 2% 1% -21% -11% 3% 48% 44% 42% 49% 43% 45% 55% 100% 85% 51% 34% -8% -18% -39% -38% -45% -36%
USD 1Y x 10Y ATM vol. -41% -42% -35% -30% -41% -41% -32% -29% 7% -3% 17% 29% 2% 13% 23% -12% 22% -21% 11% -9% -15% -5% 3% 45% 42% 23% 41% 33% 33% 80% 85% 100% 82% 55% -23% -27% -38% -34% -38% -28%
USD 5Y x 5Y ATM vol. -33% -33% -29% -27% -34% -31% -22% -16% 5% 1% 18% 31% 2% 12% 22% -1% 24% -24% 16% -12% -6% 3% 6% 35% 32% 6% 26% 21% 16% 84% 51% 82% 100% 69% -27% -25% -29% -23% -24% -18%
USD 10Y x 20Y ATM vol. -39% -37% -35% -31% -28% -35% -21% -24% 2% -5% -2% 6% -12% -7% 3% -17% 26% -28% 3% -9% -14% -10% 0% 39% 36% 11% 20% 24% 18% 44% 34% 55% 69% 100% -4% -3% -16% -13% -26% -21%
US Govt. -16% -16% -26% -36% -27% -22% -33% -32% 16% -81% -95% -98% -54% -68% -79% -46% -17% 14% -53% 22% -22% -32% -18% 22% 17% 25% 15% 10% 26% -41% -8% -23% -27% -4% 100% 67% 55% 30% -9% -15%
European Govt. 7% 9% 6% -9% 2% 7% 4% 6% -15% -51% -63% -66% -66% -80% -85% -21% -1% -8% -35% 9% -11% -18% -23% -0% 0% -2% -9% -8% -3% -36% -18% -27% -25% -3% 67% 100% 55% 71% 15% 16%
US Corp. Bonds 53% 52% 38% 19% 40% 51% 33% 41% -2% -36% -45% -52% -23% -38% -46% 14% -37% 29% -35% 35% 21% 8% 5% -38% -33% -30% -30% -55% -30% -35% -39% -38% -29% -16% 55% 55% 100% 77% 68% 62%
European Corp. Bonds 49% 51% 46% 28% 42% 52% 44% 51% -12% -18% -26% -30% -34% -48% -50% 20% -12% -1% -20% 13% 19% 13% -3% -34% -27% -33% -35% -42% -32% -26% -38% -34% -23% -13% 30% 71% 77% 100% 64% 69%
US High Yield 77% 76% 66% 55% 72% 81% 65% 75% -14% 14% 14% 11% 14% 7% 5% 57% -32% 23% 3% 27% 53% 40% 27% -61% -52% -57% -49% -65% -59% -18% -45% -38% -24% -26% -9% 15% 68% 64% 100% 93%
European High Yield 74% 71% 69% 60% 70% 76% 69% 76% -23% 18% 19% 17% 18% 11% 10% 55% -30% 22% 6% 20% 49% 38% 26% -53% -45% -55% -46% -59% -53% -10% -36% -28% -18% -21% -15% 16% 62% 69% 93% 100%
In [12]:
# Display the correlation heatmap

plt.figure(figsize=(16, 16))
plot_heatmap(data_pca_corr, annotate=False);
No description has been provided for this image

2.2 Analysis¶

In [13]:
# Perform PCA; obtain the loading matrix, scores, and explained variance

pca_model = pca.PCA(data=data_pca_normalised.values, standardize=True, normalize=False)
loadings = pca_model.loadings
scores = pca_model.scores
explained_variance = pca_model.eigenvals
explained_variance_ratio = explained_variance / explained_variance.sum() * 100
cumulative_explained_variance_ratio = pca_model.rsquare * 100
In [14]:
# Convert the numpy array loadings and scores to a pandas DataFrame for ease of viewing

scores_df = pd.DataFrame(
    index=data_pca_normalised.index,
    columns=[''.join(['PC', str(i + 1)]) for i in range(n_assets)],
    data=scores)

loadings_df = pd.DataFrame(
    index=data_pca_normalised.columns,
    columns=[''.join(['PC', str(j + 1)]) for j in range(n_assets)],
    data=loadings)

loadings_df.iloc[:, :6].style.format(formatter='{:+.2f}').map(func=lambda x: 'color:red' if x < 0. else 'color:blue')
Out[14]:
  PC1 PC2 PC3 PC4 PC5 PC6
MSCI World -0.25 -0.05 -0.07 +0.02 -0.08 +0.12
S&P 500 -0.24 -0.05 -0.01 +0.03 -0.07 +0.15
Euro Stoxx 50 -0.23 -0.01 +0.02 +0.03 -0.22 +0.06
Euro Stoxx Banks -0.20 +0.05 -0.03 -0.03 -0.26 -0.06
CDX.IG 5Y -0.23 -0.01 +0.06 -0.00 -0.21 +0.04
CDX.HY 5Y -0.24 -0.03 +0.04 +0.05 -0.14 +0.12
iTraxx Europe 5Y -0.22 +0.01 +0.08 +0.06 -0.27 -0.13
iTraxx Crossover 5Y -0.23 +0.00 +0.07 +0.12 -0.16 -0.02
Italy - Germany 10Y +0.09 -0.03 +0.05 -0.04 +0.35 +0.53
US 2Y -0.10 +0.25 +0.16 -0.11 +0.04 +0.13
US 5Y -0.10 +0.30 +0.12 -0.02 -0.01 +0.11
US 10Y -0.09 +0.31 +0.07 +0.04 -0.04 +0.04
Germany 2Y -0.10 +0.23 -0.14 -0.17 +0.05 -0.13
Germany 5Y -0.09 +0.28 -0.14 -0.16 -0.02 -0.13
Germany 10Y -0.08 +0.31 -0.10 -0.09 -0.04 -0.10
US B/E Inflation 10Y -0.18 +0.09 -0.11 +0.08 +0.06 +0.18
Dollar Index +0.12 +0.08 +0.44 +0.07 +0.04 +0.16
EUR/USD -0.11 -0.05 -0.44 -0.12 -0.05 -0.20
USD/JPY -0.04 +0.20 +0.26 -0.01 +0.04 -0.01
Precious Metals -0.07 -0.09 -0.35 +0.04 +0.11 +0.16
Industrial Metals -0.16 +0.03 -0.20 +0.09 +0.18 +0.09
Crude Oil -0.13 +0.07 -0.14 +0.08 +0.12 +0.20
Agriculture -0.08 +0.06 -0.26 +0.06 +0.26 +0.13
S&P 500 1M ATM imp. vol. +0.22 +0.03 -0.04 +0.07 -0.06 -0.12
S&P 500 1M 80% imp. vol. +0.19 +0.02 -0.05 +0.09 -0.13 -0.07
EUR/USD 3M ATM vol. +0.17 -0.01 -0.08 -0.10 -0.30 +0.42
USD/JPY 3M ATM vol. +0.16 +0.03 -0.19 +0.03 -0.29 +0.16
AUD/USD 3M ATM vol. +0.20 +0.05 -0.02 -0.06 -0.31 +0.14
USD/CAD 3M ATM vol. +0.19 -0.01 -0.12 -0.05 -0.32 +0.26
USD 2Y x 2Y ATM vol. +0.06 +0.21 -0.05 +0.38 -0.02 -0.00
USD 3M x 10Y ATM vol. +0.14 +0.13 -0.18 +0.22 -0.15 +0.07
USD 1Y x 10Y ATM vol. +0.13 +0.18 -0.12 +0.37 -0.05 +0.01
USD 5Y x 5Y ATM vol. +0.09 +0.17 -0.05 +0.43 +0.07 -0.06
USD 10Y x 20Y ATM vol. +0.10 +0.08 +0.00 +0.36 +0.07 -0.16
US Govt. +0.09 -0.31 -0.10 -0.01 +0.02 -0.08
European Govt. +0.01 -0.29 +0.12 +0.17 -0.10 -0.09
US Corp. Bonds -0.12 -0.26 -0.04 +0.17 +0.06 +0.01
European Corp. Bonds -0.13 -0.22 +0.14 +0.25 -0.06 +0.06
US High Yield -0.22 -0.07 +0.02 +0.16 +0.05 +0.11
European High Yield -0.22 -0.05 +0.03 +0.20 -0.04 +0.06
In [15]:
# Creating a pandas DataFrame for easy visualization of explained variance
var_explained_df = pd.DataFrame(
    {
        "Dim": [x for x in range(len(explained_variance) + 1)],
        "% var. explained": np.insert(explained_variance_ratio, 0, 0),
        "% cum. var. explained": cumulative_explained_variance_ratio
    }
).set_index('Dim')
var_explained_df
Out[15]:
% var. explained % cum. var. explained
Dim
0 0.000000 0.000000
1 34.950111 34.950111
2 19.612252 54.562363
3 7.861521 62.423884
4 6.735117 69.159001
5 4.431694 73.590695
6 3.200052 76.790747
7 2.573463 79.364210
8 2.358504 81.722714
9 1.910359 83.633073
10 1.750775 85.383848
11 1.631243 87.015092
12 1.431360 88.446452
13 1.307656 89.754108
14 1.282720 91.036828
15 1.145210 92.182038
16 1.060773 93.242811
17 0.900256 94.143067
18 0.781254 94.924322
19 0.754358 95.678680
20 0.657798 96.336478
21 0.631121 96.967599
22 0.457441 97.425039
23 0.391695 97.816735
24 0.321597 98.138332
25 0.290392 98.428724
26 0.264747 98.693471
27 0.228898 98.922369
28 0.209170 99.131539
29 0.162305 99.293845
30 0.143251 99.437096
31 0.128480 99.565576
32 0.097474 99.663050
33 0.093696 99.756746
34 0.060678 99.817424
35 0.057775 99.875199
36 0.041031 99.916229
37 0.034187 99.950416
38 0.020866 99.971283
39 0.017745 99.989028
40 0.010972 100.000000
In [16]:
# Find the first dimension where % cum. var. explained exceeds 75%
dim_75 = var_explained_df[var_explained_df['% cum. var. explained'] >= 75].index[0]

# Find the first dimension where % cum. var. explained exceeds 80%
dim_80 = var_explained_df[var_explained_df['% cum. var. explained'] >= 80].index[0]

point_index_75 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 75 <= y)
point_index_80 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 80 <= y)
In [17]:
plt.figure(figsize=(12,7))
plt.plot(range(len(explained_variance) + 1), var_explained_df['% cum. var. explained'], marker = 'x', linestyle = '--', color ='black')

plt.fill_between(range(len(explained_variance) + 1), 75, 80, color='yellow', alpha=0.5)
plt.fill_between(range(len(explained_variance) + 1), 80, var_explained_df['% cum. var. explained'].max()+5, color='lightblue', alpha=0.5)

point_index_75 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 75 <= y)
point_index_80 = next(i for i, y in enumerate(var_explained_df['% cum. var. explained']) if 80 <= y)


plt.scatter(point_index_75, var_explained_df['% cum. var. explained'][point_index_75], color='blue', edgecolor='black', zorder=5, s=80)
plt.scatter(point_index_80, var_explained_df['% cum. var. explained'][point_index_80], color='blue', edgecolor='black', zorder=5, s=80)

plt.hlines(y=var_explained_df['% cum. var. explained'][point_index_75], xmin=0, xmax=dim_75, colors='black', linestyles='dashed', linewidth=0.5, label='75% Variance Explained')
plt.vlines(x=dim_75, ymin=0, ymax=var_explained_df['% cum. var. explained'][point_index_75], colors='black', linestyles='dashed', linewidth=0.5)
plt.hlines(y=var_explained_df['% cum. var. explained'][point_index_80], xmin=0, xmax=dim_80, colors='black', linestyles='dashed', linewidth=0.5, label='80% Variance Explained')
plt.vlines(x=dim_80, ymin=0, ymax=var_explained_df['% cum. var. explained'][point_index_80], colors='black', linestyles='dashed', linewidth=0.5)

plt.ylim(0, 105)
plt.xlim(0, len(explained_variance))
plt.yticks(np.arange(0, 105, 5))
plt.xticks(np.arange(0, len(explained_variance) + 1, 1))

plt.title("Explained Variance by Components")
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
Out[17]:
Text(0, 0.5, 'Cumulative Explained Variance')
No description has been provided for this image
In [18]:
# Display the results
print(f"The number of components needed to explain 75% of the variance is: {dim_75}\n"
      f"The number of components needed to explain 80% of the variance is: {dim_80}")
The number of components needed to explain 75% of the variance is: 6
The number of components needed to explain 80% of the variance is: 8
In [19]:
rsquare_df = pd.DataFrame(index=data_pca_normalised.columns, columns=['PC' + str(i) for i in range(1, 7)])

for factor in data_pca_normalised.columns:
    for i in range(6):
        model = sm.OLS(data_pca_normalised[factor], sm.add_constant(scores_df.iloc[:, i])).fit()
        rsquare_df.at[factor, 'PC' + str(i + 1)] = model.rsquared

# Display R² Dataframe
rsquare_df
Out[19]:
PC1 PC2 PC3 PC4 PC5 PC6
MSCI World 0.847734 0.018132 0.015373 0.001434 0.011094 0.017129
S&P 500 0.791634 0.019967 0.00022 0.001818 0.007687 0.030013
Euro Stoxx 50 0.747764 0.000497 0.001075 0.001861 0.088351 0.00425
Euro Stoxx Banks 0.574965 0.017899 0.002831 0.002532 0.120362 0.004685
CDX.IG 5Y 0.719317 0.001376 0.011846 0.00001 0.075298 0.002283
CDX.HY 5Y 0.779932 0.009609 0.004181 0.007669 0.035685 0.01767
iTraxx Europe 5Y 0.66557 0.001616 0.022407 0.010025 0.130195 0.022045
iTraxx Crossover 5Y 0.726087 0.000088 0.016429 0.041374 0.047959 0.000557
Italy - Germany 10Y 0.10482 0.006027 0.007327 0.004699 0.221695 0.357315
US 2Y 0.147492 0.480846 0.083955 0.033085 0.003437 0.020123
US 5Y 0.142163 0.702009 0.042211 0.001648 0.000329 0.014355
US 10Y 0.108046 0.772846 0.017462 0.004516 0.002325 0.002426
Germany 2Y 0.144694 0.405909 0.062542 0.074811 0.004935 0.020125
Germany 5Y 0.109984 0.623796 0.060162 0.066282 0.000839 0.023122
Germany 10Y 0.082478 0.743492 0.028793 0.021847 0.003587 0.01257
US B/E Inflation 10Y 0.444796 0.060727 0.040191 0.01617 0.006221 0.043129
Dollar Index 0.192258 0.05414 0.613654 0.013217 0.002187 0.030921
EUR/USD 0.161131 0.023387 0.614468 0.040719 0.004057 0.05095
USD/JPY 0.022078 0.314967 0.213099 0.000244 0.002974 0.000039
Precious Metals 0.065329 0.070289 0.381549 0.005353 0.019842 0.032732
Industrial Metals 0.352014 0.005616 0.122303 0.021939 0.059225 0.011116
Crude Oil 0.22381 0.040065 0.063522 0.016739 0.024498 0.050765
Agriculture 0.093311 0.032897 0.206967 0.0089 0.116779 0.02036
S&P 500 1M ATM imp. vol. 0.666423 0.005311 0.005788 0.014132 0.00561 0.019025
S&P 500 1M 80% imp. vol. 0.506817 0.004585 0.006513 0.021419 0.027947 0.005879
EUR/USD 3M ATM vol. 0.42058 0.001755 0.019578 0.024658 0.15485 0.229027
USD/JPY 3M ATM vol. 0.346118 0.007402 0.116504 0.002239 0.144285 0.034107
AUD/USD 3M ATM vol. 0.575299 0.018893 0.001686 0.010511 0.169019 0.025335
USD/CAD 3M ATM vol. 0.48517 0.001017 0.047605 0.005852 0.182719 0.089845
USD 2Y x 2Y ATM vol. 0.047473 0.356244 0.009461 0.378987 0.000768 0.000007
USD 3M x 10Y ATM vol. 0.277087 0.125866 0.102971 0.130892 0.038537 0.006659
USD 1Y x 10Y ATM vol. 0.219929 0.240691 0.043299 0.373238 0.004693 0.000139
USD 5Y x 5Y ATM vol. 0.116821 0.224435 0.009351 0.500718 0.008305 0.00528
USD 10Y x 20Y ATM vol. 0.152617 0.047261 0.000036 0.340525 0.007533 0.033055
US Govt. 0.10792 0.767181 0.028529 0.00009 0.000967 0.007538
European Govt. 0.000543 0.679288 0.047878 0.076168 0.01787 0.011189
US Corp. Bonds 0.214779 0.517772 0.004407 0.073595 0.005459 0.00006
European Corp. Bonds 0.223653 0.377516 0.064352 0.170729 0.007107 0.00514
US High Yield 0.706725 0.043526 0.001479 0.064874 0.005255 0.01512
European High Yield 0.664684 0.019961 0.002604 0.108529 0.002195 0.003937
In [20]:
# Finding the best factors for PC1, PC2, and PC3

best_factors = rsquare_df[['PC1', 'PC2', 'PC3']].astype(float).idxmax()
best_factors_df = pd.DataFrame(best_factors).T.rename(index={0: 'Best Factor'})

best_factors_df
Out[20]:
PC1 PC2 PC3
Best Factor MSCI World US 10Y EUR/USD
In [21]:
# Loop through the first three principal components
for pc in ['PC1', 'PC2', 'PC3']:
    best_factor = best_factors_df.at['Best Factor', pc]

    # Prepare data for plotting
    plot_data = pd.DataFrame()
    plot_data[best_factor] = data_pca_normalised[best_factor].cumsum()
    plot_data[pc] = scores_df[pc].cumsum()

    # Use the custom function for plotting
    fig, ax = plot_timeseries_one_axis(plot_data);
    ax.set_title(f"Cumulative Sum of {best_factor} and {pc}")
    ax.legend()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image

3. Study of the Traditional Assets¶

In [22]:
# Read Data

change_freq = '4w'
data_trad = pd.read_csv(filepath_or_buffer='_'.join(['Traditional Assets_Multi-asset PCA', 'CHANGES=' + change_freq]) + '.csv',
                      index_col=0, header=0,
                      parse_dates=True).astype(np.float64)

n_assets_trad = data_trad.shape[1]
In [23]:
data_trad
Out[23]:
US Govt. Bonds European Govt. Bonds US IG Corp. Bonds European IG Corp. Bonds US High Yield European High Yield MSCI World S&P 500 Crude Oil Industrial Metals Precious Metals
2005-06-10 0.010386 0.017674 0.013421 0.019308 0.029171 0.029893 0.014866 0.024410 0.028562 0.012891 0.005019
2005-07-06 0.001192 -0.004078 0.002829 -0.002100 0.009255 0.009683 0.003253 -0.001373 0.112833 -0.011761 -0.022402
2005-08-03 -0.008140 -0.005336 -0.004783 -0.005385 0.015161 0.014480 0.047262 0.042098 -0.020653 0.067694 0.033755
2005-08-31 0.016193 0.015940 0.016234 0.016116 0.003449 0.007675 -0.006708 -0.018139 0.105520 0.000730 -0.027033
2005-09-27 -0.011720 0.000517 -0.014359 -0.000039 -0.009020 0.001002 0.011620 -0.002921 -0.064567 0.019788 0.063330
... ... ... ... ... ... ... ... ... ... ... ...
2022-11-30 0.026953 0.022098 0.047535 0.025662 0.021258 0.032360 0.086199 0.083794 -0.098184 0.092527 0.066374
2022-12-28 -0.006943 -0.045961 -0.006454 -0.019763 -0.006197 -0.008446 -0.054265 -0.074297 -0.023005 0.026770 0.045765
2023-01-25 0.028097 0.030792 0.041032 0.027126 0.035909 0.029603 0.068924 0.060770 0.013441 0.066096 0.052185
2023-02-22 -0.025541 -0.026568 -0.031878 -0.012932 -0.019851 0.002439 -0.008393 -0.004677 -0.081716 -0.072963 -0.070958
2023-03-22 0.029796 0.014531 0.023493 -0.000127 0.005266 -0.009522 -0.014729 -0.011974 -0.045359 -0.064751 0.053697

232 rows × 11 columns

In [24]:
# Normalise the data to have zero mean and standard deviation 1; show the normalised data

data_trad_normalised = (data_trad - data_trad.mean()) / data_trad.std()
data_trad_normalised
Out[24]:
US Govt. Bonds European Govt. Bonds US IG Corp. Bonds European IG Corp. Bonds US High Yield European High Yield MSCI World S&P 500 Crude Oil Industrial Metals Precious Metals
2005-06-10 0.637123 1.088591 0.489450 1.186558 0.785765 0.764510 0.183719 0.345216 0.322210 0.165716 0.007486
2005-07-06 -0.067827 -0.391676 -0.009991 -0.261775 0.146020 0.152702 -0.037128 -0.159374 1.077765 -0.203723 -0.485432
2005-08-03 -0.783406 -0.477266 -0.368965 -0.483981 0.335740 0.297913 0.799827 0.691387 -0.119036 0.987022 0.524058
2005-08-31 1.082408 0.970536 0.622093 0.970584 -0.040484 0.091916 -0.226582 -0.487514 1.012196 -0.016538 -0.568689
2005-09-27 -1.057891 -0.078988 -0.820468 -0.122343 -0.441008 -0.110103 0.121989 -0.189670 -0.512761 0.269072 1.055710
... ... ... ... ... ... ... ... ... ... ... ...
2022-11-30 1.907484 1.389646 2.098054 1.616380 0.531607 0.839217 1.540318 1.507397 -0.814159 1.359171 1.110415
2022-12-28 -0.691614 -3.241830 -0.447752 -1.456655 -0.350336 -0.396110 -1.131010 -1.586561 -0.140126 0.373712 0.739942
2023-01-25 1.995159 1.981227 1.791424 1.715428 1.002223 0.755746 1.211784 1.056797 0.186639 0.963064 0.855358
2023-02-22 -2.117699 -1.922162 -1.646589 -0.994542 -0.788922 -0.066579 -0.258620 -0.224048 -0.666516 -1.120919 -1.358291
2023-03-22 2.125446 0.874675 0.964376 -0.128285 0.017894 -0.428692 -0.379119 -0.366858 -0.340540 -0.997850 0.882534

232 rows × 11 columns

In [25]:
pca_trad = pca.PCA(data=data_trad_normalised.values, standardize=True, normalize=False)
loadings_trad = pca_trad.loadings
scores_trad = pca_trad.scores
explained_variance_trad = pca_trad.eigenvals
explained_variance_ratio_trad = explained_variance_trad / explained_variance_trad.sum() * 100
cumulative_explained_variance_ratio_trad = np.cumsum(explained_variance_ratio_trad)
In [26]:
loadings_trad_df = pd.DataFrame(
    index=data_trad_normalised.columns,
    columns=[''.join(['PC', str(j + 1)]) for j in range(n_assets_trad)],
    data=loadings_trad)

scores_trad_df = pd.DataFrame(
    index=data_trad_normalised.index,
    columns=[''.join(['PC', str(i + 1)]) for i in range(n_assets_trad)],
    data=scores_trad)
In [27]:
var_explained_trad_df = pd.DataFrame(
    {
        "Dim": [x for x in range(1, len(explained_variance_trad) + 1)],
        "% var. explained": explained_variance_ratio_trad,
        "% cum. var. explained": cumulative_explained_variance_ratio_trad
    }
).set_index('Dim')
var_explained_trad_df
Out[27]:
% var. explained % cum. var. explained
Dim
1 46.628907 46.628907
2 22.968751 69.597658
3 10.418276 80.015934
4 6.599419 86.615353
5 4.322212 90.937565
6 3.884804 94.822369
7 3.014014 97.836383
8 1.055166 98.891549
9 0.646350 99.537898
10 0.346765 99.884663
11 0.115337 100.000000
In [28]:
# Compute the correlation of each asset with PC1 and PC2
correlation_trad_pc1 = data_trad_normalised.corrwith(scores_trad_df['PC1'])
correlation_trad_pc2 = data_trad_normalised.corrwith(scores_trad_df['PC2'])

# Create a DataFrame to display the correlations
correlation_trad_df = pd.DataFrame({
    'PC1': correlation_trad_pc1,
    'PC2': correlation_trad_pc2
}, index=data_trad_normalised.columns)

correlation_trad_df.style.format(formatter='{:+.2f}').map(func=lambda x: 'color:red' if x < 0. else 'color:blue')
Out[28]:
  PC1 PC2
US Govt. Bonds -0.05 +0.88
European Govt. Bonds -0.32 +0.83
US IG Corp. Bonds -0.77 +0.52
European IG Corp. Bonds -0.76 +0.48
US High Yield -0.92 -0.12
European High Yield -0.89 -0.12
MSCI World -0.88 -0.25
S&P 500 -0.86 -0.22
Crude Oil -0.42 -0.50
Industrial Metals -0.59 -0.43
Precious Metals -0.41 +0.03
In [29]:
plt.figure(figsize=(10, 6))
plt.scatter(correlation_trad_df['PC1'], correlation_trad_df['PC2'], alpha=0.7)

# Ajouter les noms des points
for i in correlation_trad_df.index:
    plt.text(correlation_trad_df.loc[i, 'PC1'], correlation_trad_df.loc[i, 'PC2'], i)

# Ajouter des titres et des étiquettes
plt.title("Correlation with PC1 and PC2")
plt.xlabel("Correlation with PC1")
plt.ylabel("Correlation with PC2")
plt.grid(True)
plt.show()
No description has been provided for this image

4. Systematic Strategies¶

In [30]:
# Read Data

change_freq = '4w'
data_syst = pd.read_csv(filepath_or_buffer='_'.join(['Systematic Strategies_Multi-asset PCA', 'CHANGES=' + change_freq]) + '.csv',
                      index_col=0, header=0,
                      parse_dates=True).astype(np.float64)

n_assets_syst = data_syst.shape[1]
In [31]:
data_syst
Out[31]:
Global Quality Income vs. Index Global Equity Machine Learning European Value vs. Index Commodity Congestion Multi-asset Trend Repo Carry Long Rates Vol. Tail Hedge -SDV Tail Hedge - SCV Tail Hedge - Dynamic Put Ratio Put Spread Intraday Trend Following Strong vs. Weak Balance Sheets FX Carry FX Value Short Weekly Tail Puts
2005-06-10 0.051755 NaN 0.014358 0.024636 0.026185 NaN NaN -0.3152 NaN NaN -0.005487 NaN NaN 0.011203 0.017098 NaN
2005-07-06 0.013968 NaN 0.008676 0.003710 0.011092 NaN NaN -0.9980 NaN NaN -0.000930 NaN NaN -0.009567 0.019454 NaN
2005-08-03 -0.045271 NaN 0.010801 0.000871 -0.003290 NaN NaN 0.0469 NaN NaN -0.006943 NaN NaN 0.005324 -0.007057 NaN
2005-08-31 0.008821 NaN 0.007892 0.002410 0.020470 NaN NaN -0.3767 NaN NaN 0.002420 NaN NaN -0.003842 0.013888 NaN
2005-09-27 0.028851 NaN -0.004355 0.002619 0.012372 NaN NaN -0.5122 NaN NaN -0.000688 NaN NaN 0.020388 0.008387 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-11-30 -0.047987 -0.013439 0.011862 0.018969 -0.038204 0.008922 -0.000696 1.6405 0.011194 -0.002338 -0.024075 0.009247 0.017270 -0.025170 0.014000 0.002686
2022-12-28 0.017136 0.003181 0.011447 0.006698 0.002398 -0.004487 0.016413 -4.3861 -0.015289 0.005956 0.022461 -0.012614 0.025709 -0.023997 0.011885 0.002531
2023-01-25 -0.050958 -0.003026 0.033966 0.005789 -0.030022 0.012565 -0.028874 -0.7273 0.016171 0.004102 -0.023007 -0.002602 -0.014000 -0.009368 0.009310 0.001728
2023-02-22 0.020082 0.021498 0.011651 0.019872 0.000874 -0.002980 0.022189 -0.4369 -0.004838 0.000700 0.001410 0.001495 0.003643 0.011919 -0.007237 0.002778
2023-03-22 -0.011847 0.006434 -0.025383 -0.003288 -0.039426 -0.004457 -0.004162 0.6684 0.000329 -0.000871 0.002799 -0.003091 0.039701 -0.015427 0.016647 0.004708

232 rows × 16 columns

Some columns contain missing values. Let's check the proportion of missing values in each column :

In [32]:
# Null values in dataset

null = data_syst.isnull().sum().sort_values(ascending=False)
percent_missing = (data_syst.isnull().sum() / data_syst.shape[0] * 100).sort_values(ascending=False)

missing_data = pd.concat([null, percent_missing], axis=1, keys=['Total missing', 'Percent missing'])
missing_data = missing_data.rename(columns= { "index": " column name"})

missing_data.sort_values(by ='Total missing', ascending = False)
Out[32]:
Total missing Percent missing
Tail Hedge - SCV 147 63.362069
Long Rates Vol. 96 41.379310
Repo Carry 86 37.068966
Intraday Trend Following 35 15.086207
Short Weekly Tail Puts 35 15.086207
Strong vs. Weak Balance Sheets 34 14.655172
Global Equity Machine Learning 9 3.879310
Tail Hedge - Dynamic Put Ratio 9 3.879310
Global Quality Income vs. Index 0 0.000000
European Value vs. Index 0 0.000000
Commodity Congestion 0 0.000000
Multi-asset Trend 0 0.000000
Tail Hedge -SDV 0 0.000000
Put Spread 0 0.000000
FX Carry 0 0.000000
FX Value 0 0.000000

Many columns have too many missing values, so we can't fill in the missing data with sample means, medians or other rudimentary interpolations that will overwhelm out the signal of the data with noise. More, if those columns are removed, there's a risk of losing too much information. So, we will use probabilistic PCA which compute factors where some of the data are missing.

In [33]:
# Create Probabilistic PCA (PPCA) object
ppca = PPCA();

# Number of pricipal components
n_pc = 2

# Fit the PPCA model with principal components
ppca.fit(data=data_syst.values, d=n_pc, verbose=True)

# Obtain the principal components
component_mat = ppca.transform();
1.0
0.7729708684588203
0.9301835983744469
2.7491901090738775
0.2389130331871705
0.018428175901263777
0.04504531876524365
0.03674361976616114
0.023715858867782602
0.012813721429227765
0.005225934986834435
0.000559019916584913
0.001959939193413973
0.0030679093783592126
0.0033343649110932905
0.0031500720705690544
0.0027608113928707656
0.002309504496337711
0.0018721304818993811
0.001483922415887351
0.0011568400493260356
0.0008905228482347649
0.0006788107955162381
0.0005134281463505808
0.00038593174355594506
0.0002886403032626994
0.00021499143719405645
0.00015959557091260734
0.00011814353042938208
8.725560823674705e-05
In [34]:
variance_explained = ppca.var_exp
components = ppca.data
model_params = ppca.C
In [35]:
scores_syst_df = pd.DataFrame(
    index=data_syst.index,
    columns=[''.join(['PC', str(i + 1)]) for i in range(n_pc)],
    data=component_mat)

scores_syst_df
Out[35]:
PC1 PC2
2005-06-10 -0.885883 1.143596
2005-07-06 -0.316118 -0.200146
2005-08-03 1.175464 0.318211
2005-08-31 -0.413634 -0.317862
2005-09-27 -0.105973 -0.312105
... ... ...
2022-11-30 0.331661 2.103474
2022-12-28 -0.692903 -2.270541
2023-01-25 2.094534 2.072162
2023-02-22 -0.031694 -0.316743
2023-03-22 -0.501151 -0.945952

232 rows × 2 columns

In [36]:
# Compute the correlation of each asset with PC1 and PC2
correlation_syst_pc1 = data_syst.corrwith(scores_syst_df['PC1'])
correlation_syst_pc2 = data_syst.corrwith(scores_syst_df['PC2'])

# Create a DataFrame to display the correlations
correlation_syst_df = pd.DataFrame({
    'PC1': correlation_syst_pc1,
    'PC2': correlation_syst_pc2
}, index=data_syst.columns)

correlation_syst_df.style.format(formatter='{:+.2f}').map(func=lambda x: 'color:red' if x < 0. else 'color:blue')
Out[36]:
  PC1 PC2
Global Quality Income vs. Index -0.54 -0.38
Global Equity Machine Learning +0.14 -0.16
European Value vs. Index +0.67 +0.34
Commodity Congestion -0.17 +0.55
Multi-asset Trend -0.45 -0.25
Repo Carry -0.17 +0.71
Long Rates Vol. -0.48 -0.23
Tail Hedge -SDV -0.64 +0.55
Tail Hedge - SCV -0.69 +0.67
Tail Hedge - Dynamic Put Ratio -0.78 +0.34
Put Spread -0.62 -0.56
Intraday Trend Following -0.70 +0.36
Strong vs. Weak Balance Sheets -0.54 -0.29
FX Carry +0.60 +0.31
FX Value -0.18 +0.17
Short Weekly Tail Puts -0.17 +0.30
In [37]:
plt.figure(figsize=(10, 6))
plt.scatter(correlation_syst_df['PC1'], correlation_syst_df['PC2'], alpha=0.7)

# Ajouter les noms des points
for i in correlation_syst_df.index:
    plt.text(correlation_syst_df.loc[i, 'PC1'], correlation_syst_df.loc[i, 'PC2'], i)

# Ajouter des titres et des étiquettes
plt.title("Correlation with PC1 and PC2")
plt.xlabel("Correlation with PC1")
plt.ylabel("Correlation with PC2")
plt.grid(True)
plt.show()
No description has been provided for this image

5. Portfolio Optimization¶

Data
In [38]:
equity_data = pd.read_csv('equity_factor_performance_data.csv',
                            index_col=0, header=0,
                            parse_dates=True).astype(np.float64)
In [39]:
equity_data
Out[39]:
Growth Low Risk Low Size Momentum Value Quality Income
2018-09-03 3147.275589 2521.365122 4447.639970 7230.019236 5367.396587 2746.458678
2018-09-04 3147.869877 2517.854224 4423.624532 7240.434396 5339.050310 2744.511885
2018-09-05 3123.127074 2512.737537 4432.075319 7221.320382 5358.611721 2765.911701
2018-09-06 3106.574794 2518.676856 4399.732989 7182.130965 5311.798170 2752.442728
2018-09-07 3101.329324 2510.221028 4376.879336 7139.770025 5291.714564 2749.406337
... ... ... ... ... ... ...
2023-08-28 4722.945678 4272.219410 5360.124710 11363.308993 7459.215794 4465.448906
2023-08-29 4806.024404 4309.305708 5436.687835 11535.042889 7543.478208 4482.673495
2023-08-30 4841.395229 4326.786715 5465.827827 11598.645379 7544.416986 4483.233991
2023-08-31 4852.078911 4308.159090 5473.408710 11640.134272 7521.389497 4450.985502
2023-09-01 4884.347114 4317.863443 5501.348987 11709.032625 7570.535386 4448.521921

1305 rows × 6 columns

Next, we find the simple daily returns for each of the 12 assets using the pct_change() method, since our data object is a pandas DataFrame. We use simple returns since they have the property of being asset-additive, which is necessary since we need to compute portfolios returns:

In [40]:
# Compute daily simple returns
daily_returns = (
  equity_data.pct_change()
            .dropna(
              # Drop the first row since we have NaN's
              # The first date 2018-09-03 does not have a value since it is our cut-off date
              axis = 0,
              how = 'any',
              inplace = False
              )
)
# Examine the last 5 rows
daily_returns.tail(n = 5)
Out[40]:
Growth Low Risk Low Size Momentum Value Quality Income
2023-08-28 0.005015 0.006035 0.005886 0.006481 0.007407 0.003419
2023-08-29 0.017590 0.008681 0.014284 0.015113 0.011296 0.003857
2023-08-30 0.007360 0.004057 0.005360 0.005514 0.000124 0.000125
2023-08-31 0.002207 -0.004305 0.001387 0.003577 -0.003052 -0.007193
2023-09-01 0.006650 0.002253 0.005105 0.005919 0.006534 -0.000553

The simple daily returns may be visualized using line charts, density plots, and histograms, which are covered in my other post on visualizing asset data. Even though the visualizations in that post use the ggplot2 package in R, the plotnine package, or any other Python graphics librarires, can be employed to produce them in Python. For now, let us annualize the daily returns over the 5-year period from 2018-09-03 to 2023-09-01. We assume the number of trading days in a year is computed as follows:

$$365.25 \quad \text{(days on average per year)} \cdot \frac{5}{7} \quad \text{(proportion work days per week)} - 6 \quad \text{(weekday holidays)} - 3 \cdot \frac{5}{7} \quad \text{(fixed date holidays)} = 252.75 \approx 253$$
In [41]:
daily_returns.mean() * 253
Out[41]:
Growth            0.117797
Low Risk          0.122932
Low Size          0.084286
Momentum          0.118064
Value             0.103472
Quality Income    0.111360
dtype: float64

The annualized variance-covariance matrix of the returns can be computed using built-in pandas method cov():

In [42]:
daily_returns.cov() * 253
Out[42]:
Growth Low Risk Low Size Momentum Value Quality Income
Growth 0.064674 0.044142 0.062492 0.050407 0.056067 0.034156
Low Risk 0.044142 0.036917 0.042614 0.039387 0.040846 0.029625
Low Size 0.062492 0.042614 0.085696 0.053194 0.077119 0.043524
Momentum 0.050407 0.039387 0.053194 0.048724 0.051333 0.034334
Value 0.056067 0.040846 0.077119 0.051333 0.073120 0.043255
Quality Income 0.034156 0.029625 0.043524 0.034334 0.043255 0.035410

Minimize Risk given Levels of Return

$$\min _{\vec{w}} \sqrt{\vec{w}^T \hat{\Sigma} \vec{w}}$$

subject to $$ \begin{aligned} & \vec{w}^T \hat{\mu}=\overline{r}_P \\ & \vec{w}^T \vec{1}=1 \quad \text { (Full investment) } \\ & \overrightarrow{0} \leq \vec{w} \leq \vec{1} \quad \text { (Long only) } \end{aligned} $$

Maximize Return given Levels of Risk

$$\max _{\vec{w}} \vec{w}^T \hat{\mu}$$

subject to

$$\begin{aligned} & \vec{w}^T \hat{\Sigma} \vec{w}=\overline{\sigma}_P \\ & \vec{w}^T \overrightarrow{1}=1 \quad \text { (Full investment) } \\ & \overrightarrow{0} \leq \vec{w} \leq \overrightarrow{1} \quad \text { (Long only) }\end{aligned}$$

In absence of other constraints, the above model is loosely referred to as the "unconstrained" portfolio optimization model. Solving the mathematical model yields a set of optimal weights representing a set of optimal portfolios.

5.1 Monte Carlo Simulation¶

The first task is to simulate a random set of portfolios to visualize the risk-return profiles of our given set of assets. We use a for loop to simulate random vectors of asset weights, computing the expected portfolio return and standard deviation for each permutation of random weights. Again, we ensure that each random weight vector is subject to the long-positions-only and full-investment constraints.

Monte Carlo Simulation

The empty containers we instantiate are lists; they are mutable and so growing them will not be memory inefficient.

In [43]:
# instantiate empty list containers for returns and sd
list_portfolio_returns = []
list_portfolio_sd = []
# For loop to simulate 5000 random weight vectors (numpy array objects)
for p in range(5000):
  # Return random floats in the half-open interval [0.0, 1.0)
  weights = np.random.random(size = equity_data.shape[1])
  # Normalize to unity
  # The /= operator divides the array by the sum of the array and rebinds "weights" to the new object
  weights /= np.sum(weights)
  # Lists are mutable so growing will not be memory inefficient
  list_portfolio_returns.append(portfolio_returns(daily_returns, weights))
  list_portfolio_sd.append(portfolio_sd(daily_returns, weights))
  # Convert list to numpy arrays
  port_returns = np.array(object = list_portfolio_returns)
  port_sd = np.array(object = list_portfolio_sd)

Let us examine the simulation results. In particular, the highest and the lowest expected portfolio returns are as follows:

In [44]:
# Max expected return
max_expected_return = round(max(port_returns), 4)

# Min expected return
min_expected_return = round(min(port_returns), 4)

print(f'Highest expected portfolio returns : {max_expected_return}\nLowest expected portfolio returns : {min_expected_return}')
Highest expected portfolio returns : 0.1196
Lowest expected portfolio returns : 0.0967

On the other hand, the highest and lowest volatility measures are recorded as:

In [45]:
# Max sd
max_vol = round(max(port_sd), 4)

# Min sd
min_vol = round(min(port_sd), 4)

print(f'Highest volatility measure : {max_vol}\nLowest volatility measure : {min_vol}')
Highest volatility measure : 0.2671
Lowest volatility measure : 0.1909
In [158]:
sharpe_ratio = [r / sd for r, sd in zip(port_returns, port_sd)]  # Sharpe Ratio calculation

# Create a scatter plot
fig = px.scatter(
    x=port_sd, y=port_returns, color=sharpe_ratio,
    labels={'x': 'Portfolio Standard Deviation (Annualized)', 'y': 'Expected Portfolio Return (Annualized)'},
    color_continuous_scale='Viridis',  # You can choose any color scale
)

# Update layout
fig.update_layout(
    title='Mean-Standard Deviation Diagram',
    xaxis_tickformat='.2%',
    yaxis_tickformat='.2%',
)

# Add colorbar title
fig.update_layout(coloraxis_colorbar=dict(title="Sharpe Ratio"))

# Show the plot
fig.show()

Each point in the diagram above represents a permutation of expected-return-standard-deviation value pair. The points are color coded such that the magnitudes of the Sharpe ratios, defined as $SR = \frac{\mu_P - r_f}{\sigma_P}$, can be readily observed for each expected-return-standard-deviation pairing. For simplicity, we assume that $r_f \equiv 0$.

5.2 Optimal Portfolio¶

We will use dictionaries inside of a tuple to represent the constraints:

In [46]:
#We use an anonymous lambda function
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})

Next, the bound values for the weights:

In [47]:
# This creates 12 tuples of (0, 1), all of which exist within a container tuple
# We essentially create a sequence of (min, max) pairs
bounds = tuple(
  (0, 1) for w in weights
)

We also need to supply a starting list of weights, which essentially functions as an initial guess. For our purposes, this will be an equal weight array:

In [48]:
# Repeat the list with the value (1 / 12) 12 times, and convert list to array
equal_weights = np.array(
  [1 / equity_data.shape[1]] * equity_data.shape[1]
)
Minimum Variance Portfolio
In [49]:
# Minimize sd
min_sd_results = minimize(
  # Objective function
  fun = lambda weights: portfolio_sd(daily_returns, weights),
  # Initial guess, which is the equal weight array
  x0 = equal_weights,
  method = 'SLSQP',
  bounds = bounds,
  constraints = constraints
)
Minimum Variance Weights
In [50]:
optimal_weights_df = pd.DataFrame(min_sd_results.x.reshape(1, -1), columns=equity_data.columns, index=['Weights'])
optimal_weights_df
Out[50]:
Growth Low Risk Low Size Momentum Value Quality Income
Weights 2.832792e-17 0.434213 0.0 3.469447e-17 0.0 0.565787
Expected return (Minimum variance portfolio)
In [51]:
# Expected return
min_sd_port_return = portfolio_returns(daily_returns, min_sd_results["x"])
print(f'Expected return (Minimum variance portfolio) : {round(min_sd_port_return, 4)}')
Expected return (Minimum variance portfolio) : 0.1164
Standard deviation (Minimum variance portfolio)
In [52]:
# Standard deviation
min_sd_port_sd = portfolio_sd(daily_returns, min_sd_results["x"])
print(f'Standard deviation (Minimum variance portfolio) : {round(min_sd_port_sd, 4)}')
Standard deviation (Minimum variance portfolio) : 0.1813
Sharpe ratio (Minimum variance portfolio)
In [53]:
# Sharpe ratio
min_sd_port_sharpe = min_sd_port_return / min_sd_port_sd
print(f'Sharpe ratio (Minimum variance portfolio) : {round(min_sd_port_sharpe, 4)}')
Sharpe ratio (Minimum variance portfolio) : 0.6421
Portfolio returns (Minimum variance portfolio)
In [54]:
# Calculate the portfolio returns
min_var_returns = daily_returns.dot(min_sd_results.x)

# Calculate the cumulative sum of the portfolio returns
cumulative_portfolio_returns = min_var_returns.cumsum()

print(f'Portfolio returns :\n{min_var_returns}')
Portfolio returns :
2018-09-04   -0.001006
2018-09-05    0.003529
2018-09-06   -0.001729
2018-09-07   -0.002082
2018-09-10    0.003524
                ...   
2023-08-28    0.004555
2023-08-29    0.005952
2023-08-30    0.001832
2023-08-31   -0.005939
2023-09-01    0.000665
Length: 1304, dtype: float64
In [55]:
print(f'Cumulative sum of the portfolio returns :\n{cumulative_portfolio_returns}')
Cumulative sum of the portfolio returns :
2018-09-04   -0.001006
2018-09-05    0.002524
2018-09-06    0.000795
2018-09-07   -0.001287
2018-09-10    0.002237
                ...   
2023-08-28    0.597355
2023-08-29    0.603306
2023-08-30    0.605138
2023-08-31    0.599199
2023-09-01    0.599864
Length: 1304, dtype: float64
In [56]:
cumulative_portfolio_returns.plot(figsize=(10, 6), title='Cumulative Portfolio Returns')
plt.xlabel('Date')
plt.ylabel('Returns')
plt.grid(True)
plt.show()
No description has been provided for this image

5.3 Montly returns¶

5.3 Long-Only¶

In [57]:
daily_returns = daily_returns.iloc[1:]

# cumulative compounds
monthly_returns = daily_returns.resample('M').apply(lambda x: (1 + x).prod() - 1)

monthly_returns.head()
Out[57]:
Growth Low Risk Low Size Momentum Value Quality Income
2018-09-30 -0.003543 0.008228 -0.002006 -0.006172 -0.004550 0.015995
2018-10-31 -0.082113 -0.070511 -0.075950 -0.083624 -0.078524 -0.066693
2018-11-30 0.024886 0.039180 0.021326 0.011756 0.019698 0.047711
2018-12-31 -0.083202 -0.075134 -0.129404 -0.090721 -0.122200 -0.083466
2019-01-31 0.103440 0.072108 0.122880 0.089383 0.126459 0.071022
In [112]:
long_only_min_variance_portfolio_weights = optimize_long_only_min_variance_portfolio(monthly_returns)
long_only_min_variance_portfolio_weights.head()
Out[112]:
Growth Low Risk Low Size Momentum Value Quality Income
2018-09-30 0.166667 0.166667 0.166667 0.166667 0.166667 0.166667
2018-10-31 0.166667 0.166667 0.166667 0.166667 0.166667 0.166667
2018-11-30 0.166665 0.141698 0.212719 0.166665 0.195608 0.116645
2018-12-31 0.166663 0.116725 0.237718 0.216661 0.195606 0.066626
2019-01-31 0.208338 0.116726 0.187742 0.249995 0.170571 0.066627
In [120]:
# Calculate and display the PnL
# Shift the weights, as returns are realized in the next month
long_only_portfolio_returns = (monthly_returns * long_only_min_variance_portfolio_weights.shift(1)).sum(axis=1) * 12
monthly_pnl_long_only = long_only_portfolio_returns.cumsum()
monthly_pnl_long_only
Out[120]:
2018-09-30    0.000000
2018-10-31   -0.914830
2018-11-30   -0.585717
2018-12-31   -1.795308
2019-01-31   -0.550894
                ...   
2023-05-31    5.862406
2023-06-30    6.613881
2023-07-31    6.962976
2023-08-31    6.705498
2023-09-30    6.748303
Freq: M, Length: 61, dtype: float64
In [121]:
# Plot the PnL
monthly_pnl_long_only.plot(figsize=(10, 6), title='Monthly Portfolio PnL')
plt.xlabel('Date')
plt.ylabel('PnL')
plt.grid(True)
plt.show()
No description has been provided for this image

5.3 Long-Short¶

We will perform the same monthly portfolio optimization, but this time we are permitted to take short positions and must ensure that the Gross Market Value (GMV) of the portfolio is precisely 1.

In [152]:
monthly_returns = daily_returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
long_short_min_variance_portfolio_weights = optimize_long_short_min_variance_portfolio(monthly_returns)
long_short_min_variance_portfolio_weights
Out[152]:
Growth Low Risk Low Size Momentum Value Quality Income
2018-09-30 0.166667 0.166667 0.166667 0.166667 0.166667 0.166667
2018-10-31 0.141672 0.141672 0.141672 0.141672 0.141672 0.141672
2018-11-30 0.091683 0.091680 0.141672 0.141672 0.141672 0.091653
2018-12-31 0.041683 0.041680 0.141672 0.141672 0.141672 0.041653
2019-01-31 0.041683 0.041680 0.091699 0.141672 0.091671 -0.008376
... ... ... ... ... ... ...
2023-05-31 0.000692 0.002484 -0.003848 0.002103 -0.002845 0.005976
2023-06-30 0.000893 0.002422 -0.003860 0.002131 -0.002873 0.005766
2023-07-31 0.001031 0.002403 -0.003883 0.002133 -0.002899 0.005716
2023-08-31 0.001116 0.002416 -0.003925 0.002149 -0.002926 0.005717
2023-09-30 0.001155 0.002454 -0.003973 0.002176 -0.002958 0.005727

61 rows × 6 columns

In [153]:
# Calculate and display the PnL
# Shift the weights, as returns are realized in the next month
long_short_portfolio_returns = (monthly_returns * long_short_min_variance_portfolio_weights.shift(1)).sum(axis=1) * 12
monthly_pnl_long_short = long_short_portfolio_returns.cumsum()
monthly_pnl_long_short
Out[153]:
2018-09-30    0.000000
2018-10-31   -0.914830
2018-11-30   -0.635074
2018-12-31   -1.483045
2019-01-31   -0.783892
                ...   
2023-05-31   -0.527225
2023-06-30   -0.527232
2023-07-31   -0.527726
2023-08-31   -0.527783
2023-09-30   -0.527984
Freq: M, Length: 61, dtype: float64